# DELETE BEFORE PUBLISHING
# This is just here so you can preview the styling on your local machine
from IPython.core.display import HTML
HTML("""
<style>
.usecase-title, .usecase-duration, .usecase-section-header {
padding-left: 15px;
padding-bottom: 10px;
padding-top: 10px;
padding-right: 15px;
background-color: #0f9295;
color: #fff;
}
.usecase-title {
font-size: 1.7em;
font-weight: bold;
}
.usecase-authors, .usecase-level, .usecase-skill {
padding-left: 15px;
padding-bottom: 7px;
padding-top: 7px;
background-color: #baeaeb;
font-size: 1.4em;
color: #121212;
}
.usecase-level-skill {
display: flex;
}
.usecase-level, .usecase-skill {
width: 50%;
}
.usecase-duration, .usecase-skill {
text-align: right;
padding-right: 15px;
padding-bottom: 8px;
font-size: 1.4em;
}
.usecase-section-header {
font-weight: bold;
font-size: 1.2em;
}
.usecase-subsection-header, .usecase-subsection-blurb {
font-weight: bold;
font-size: 1.2em;
color: #121212;
}
.usecase-subsection-blurb {
font-size: 1em;
font-style: italic;
}
</style>
""")
As a City of Melbourne council worker, I want to visualise and provide statistics on upcoming activities and planned works in entertainment and leisure, so that I can understand impact for my local area.
I also want to know which entertainment locations are projected as growth areas.
At the end of this use case you will understand what entertainment and leisure activities are happening in a local area
This means learning how to:
The City of Melbourne conducts a census of all local businesses every two years. The last published survey was in 2020, the next survey results are expected soon.
The CLUE datasets contain information on venues:
#Libraries to be installed
##!pip -q is to give less output
!pip -q install sodapy
!pip -q install seaborn
!pip -q install pandas
!pip -q install matplotlib
!pip -q install numpy
!pip -q install nbconvert
!pip -q install keyboard
!pip -q install geopandas
!pip -q install requests
!pip -q install folium
#load libraries
import os
import io
import time
import keyboard
import warnings
warnings.filterwarnings('ignore')
from datetime import datetime
import requests
import numpy as np
import pandas as pd
from sodapy import Socrata
from urllib.request import urlopen
import json
import folium
from folium import Choropleth, Circle, Marker
from folium.plugins import HeatMap, MarkerCluster
from IPython.core.display import display, HTML
import geopandas as gpd
import plotly.express as px
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
from matplotlib import style
style.use('ggplot')
#Socrata client connection
client = Socrata('data.melbourne.vic.gov.au', '9UAGAvkem9fqXDhBLElapjazL', None)
Load the CLUE blocks geographical boundary layer to to link the CLUE information.
# spatial layer used to map CLUE datasets to CLUE blocks
domain = "data.melbourne.vic.gov.au"
geoJSON_Id = 'aia8-ryiq' #CLUE Blocks
#Call the API
GeoJSONURL = 'https://'+domain+'/api/geospatial/'+geoJSON_Id+'?method=export&format=GeoJSON'
with urlopen(GeoJSONURL) as response:
clueblocks = json.load(response)
#sample - what does the data look like
clueblocks["features"][0]
{'type': 'Feature',
'properties': {'block_id': '662', 'clue_area': 'East Melbourne'},
'geometry': {'type': 'Polygon',
'coordinates': [[[144.9899553574, -37.8176128042],
[144.9899751639, -37.8175252287],
[144.9900850867, -37.8168872256],
[144.9880108573, -37.8166537105],
[144.9877992698, -37.816654024],
[144.9876430254, -37.8166542547],
[144.9860775351, -37.8164760542],
[144.9860542492, -37.8164731858],
[144.9853710501, -37.8163992503],
[144.985196446, -37.8163794828],
[144.9826415715, -37.8160938525],
[144.9825871855, -37.8163908529],
[144.982560267, -37.8165378587],
[144.9851378107, -37.8168244375],
[144.9851485344, -37.8167632769],
[144.9854357153, -37.8167956425],
[144.9854364507, -37.8167914483],
[144.9875823469, -37.8170310783],
[144.9879410231, -37.8170711275],
[144.9878969846, -37.8173467142],
[144.9888254215, -37.8174352545],
[144.9888147558, -37.8174981936],
[144.9899553574, -37.8176128042]]]}}
#Load Bar, tavern, pub patron capacity dataset
df_btp_capacity = pd.DataFrame.from_dict(client.get_all('mffi-m9yn'))
print(df_btp_capacity.shape)
integer_columns = ['census_year', 'block_id', 'property_id', 'base_property_id', 'number_of_patrons']
str_columns = ['street_address', 'clue_small_area', 'trading_name']
float_columns = ['x_coordinate', 'y_coordinate']
df_btp_capacity[integer_columns] = df_btp_capacity[integer_columns].astype(int)
df_btp_capacity[float_columns] = df_btp_capacity[float_columns].astype(float)
df_btp_capacity[str_columns] = df_btp_capacity[str_columns].astype(str)
df_btp_capacity.dropna(subset=['x_coordinate'])
df_btp_capacity.dropna(subset=['y_coordinate'])
df_btp_capacity.dropna(subset=['street_address'])
df_btp_capacity = df_btp_capacity.drop('geocoded_column',axis=1)
print(df_btp_capacity.shape)
print('\n',df_btp_capacity.info())
df_btp_capacity_y1=df_btp_capacity.query("census_year == 2018")
df_btp_capacity_y2=df_btp_capacity.query("census_year == 2019")
df_btp_capacity_y3=df_btp_capacity.query("census_year == 2020")
(4402, 12) (4402, 11) <class 'pandas.core.frame.DataFrame'> RangeIndex: 4402 entries, 0 to 4401 Data columns (total 11 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 census_year 4402 non-null int32 1 block_id 4402 non-null int32 2 property_id 4402 non-null int32 3 base_property_id 4402 non-null int32 4 street_address 4402 non-null object 5 clue_small_area 4402 non-null object 6 trading_name 4402 non-null object 7 business_address 4402 non-null object 8 number_of_patrons 4402 non-null int32 9 x_coordinate 4382 non-null float64 10 y_coordinate 4382 non-null float64 dtypes: float64(2), int32(5), object(4) memory usage: 292.4+ KB None
df_btp_capacity_y1.head(5)
| census_year | block_id | property_id | base_property_id | street_address | clue_small_area | trading_name | business_address | number_of_patrons | x_coordinate | y_coordinate | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 3352 | 2018 | 6 | 578321 | 573333 | 2 Swanston Street MELBOURNE 3000 | Melbourne (CBD) | Beer De Luxe | 2 Swanston Street MELBOURNE 3000 | 1000 | 144.969942 | -37.817778 |
| 3353 | 2018 | 6 | 578324 | 573333 | 2 Swanston Street MELBOURNE 3000 | Melbourne (CBD) | Transport Hotel | Tenancy 29, Ground , 2 Swanston Street MELBOUR... | 600 | 144.969942 | -37.817778 |
| 3354 | 2018 | 6 | 578324 | 573333 | 2 Swanston Street MELBOURNE 3000 | Melbourne (CBD) | Transit Rooftop Bar | Tenancy 29, Level 2, 2 Swanston Street MELBOUR... | 200 | 144.969942 | -37.817778 |
| 3355 | 2018 | 6 | 578327 | 573333 | 2 Swanston Street MELBOURNE 3000 | Melbourne (CBD) | Zinc at Federation Square | Tenancy 18, 2 Swanston Street MELBOURNE 3000 | 1000 | 144.969942 | -37.817778 |
| 3356 | 2018 | 11 | 103984 | 103984 | 566-580 Flinders Street MELBOURNE 3000 | Melbourne (CBD) | UBar | 8 Flinders Street MELBOURNE 3000 | 128 | 144.955336 | -37.820711 |
#Load Cafe, restaurant, bistro seats dataset
df_crb = pd.DataFrame.from_dict(client.get_all('xt2y-tnn9'))
integer_columns = ['census_year', 'block_id', 'property_id', 'base_property_id', 'number_of_seats'
,'industry_anzsic4_code']
str_columns = ['street_address', 'clue_small_area', 'trading_name','industry_anzsic4_description','seating_type']
float_columns = ['x_coordinate', 'y_coordinate']
df_crb[integer_columns] = df_crb[integer_columns].astype(int)
df_crb[float_columns] = df_crb[float_columns].astype(float)
df_crb[str_columns] = df_crb[str_columns].astype(str)
#drop NaN values
df_crb.dropna(subset=['x_coordinate'])
df_crb.dropna(subset=['y_coordinate'])
df_crb.dropna(subset=['street_address'])
#drop columns
df_crb = df_crb.drop('geocoded_column',axis=1)
print(df_crb.shape)
df_crb.head(5)
(56987, 14)
| census_year | block_id | property_id | base_property_id | street_address | clue_small_area | business_address | trading_name | industry_anzsic4_code | industry_anzsic4_description | seating_type | number_of_seats | x_coordinate | y_coordinate | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2002 | 2 | 111467 | 103973 | 0 King Street MELBOURNE 3000 | Melbourne (CBD) | 469-479 King Street MELBOURNE 3000 | Melbourne Aquarium | 8921 | Zoological and Botanical Gardens Operation | Seats - Indoor | 113 | 144.957426 | -37.82223 |
| 1 | 2002 | 2 | 111467 | 103973 | 0 King Street MELBOURNE 3000 | Melbourne (CBD) | 469-479 King Street MELBOURNE 3000 | Melbourne Aquarium | 8921 | Zoological and Botanical Gardens Operation | Seats - Outdoor | 38 | 144.957426 | -37.82223 |
| 2 | 2002 | 4 | 103972 | 103972 | 363-397 Flinders Street MELBOURNE 3000 | Melbourne (CBD) | Vault 12, 387 Flinders Street MELBOURNE 3000 | Subway Sauna | 9539 | Other Personal Services n.e.c. | Seats - Indoor | 15 | 144.960985 | -37.82115 |
| 3 | 2002 | 4 | 103972 | 103972 | 363-397 Flinders Street MELBOURNE 3000 | Melbourne (CBD) | Vault 1, 363-367 Flinders Street MELBOURNE 3000 | Underworld Health & Fitness | 9111 | Health and Fitness Centres and Gymnasia Operation | Seats - Indoor | 20 | 144.960985 | -37.82115 |
| 4 | 2002 | 4 | 103972 | 103972 | 363-397 Flinders Street MELBOURNE 3000 | Melbourne (CBD) | Vault 1, 363-367 Flinders Street MELBOURNE 3000 | Underworld Health & Fitness | 9111 | Health and Fitness Centres and Gymnasia Operation | Seats - Outdoor | 18 | 144.960985 | -37.82115 |
#create data frames per year for some visuals
df_crb_y1=df_crb.query("census_year == 2018")
df_crb_y2=df_crb.query("census_year == 2019")
df_crb_y3=df_crb.query("census_year == 2020")
df_crb_y3.head(5)
| census_year | block_id | property_id | base_property_id | street_address | clue_small_area | business_address | trading_name | industry_anzsic4_code | industry_anzsic4_description | seating_type | number_of_seats | x_coordinate | y_coordinate | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 50593 | 2020 | 1 | 611394 | 611394 | 545-557 Flinders Street MELBOURNE VIC 3000 | Melbourne (CBD) | 551 Flinders Street MELBOURNE VIC 3000 | Nandos Northbank | 4511 | Cafes and Restaurants | Seats - Indoor | 60 | 144.956514 | -37.820979 |
| 50594 | 2020 | 1 | 611394 | 611394 | 545-557 Flinders Street MELBOURNE VIC 3000 | Melbourne (CBD) | 551 Flinders Street MELBOURNE VIC 3000 | Nandos Northbank | 4511 | Cafes and Restaurants | Seats - Outdoor | 6 | 144.956514 | -37.820979 |
| 50595 | 2020 | 1 | 611394 | 611394 | 545-557 Flinders Street MELBOURNE VIC 3000 | Melbourne (CBD) | 547 Flinders Street MELBOURNE VIC 3000 | Tokyo Maki | 4511 | Cafes and Restaurants | Seats - Indoor | 44 | 144.956514 | -37.820979 |
| 50596 | 2020 | 1 | 611394 | 611394 | 545-557 Flinders Street MELBOURNE VIC 3000 | Melbourne (CBD) | 553 Flinders Street MELBOURNE VIC 3000 | Domino's Pizza | 4512 | Takeaway Food Services | Seats - Indoor | 12 | 144.956514 | -37.820979 |
| 50597 | 2020 | 1 | 611395 | 611395 | 561-581 Flinders Street MELBOURNE VIC 3000 | Melbourne (CBD) | 563 Flinders Street MELBOURNE VIC 3000 | Di Parsia Pasticceria Cafe | 4512 | Takeaway Food Services | Seats - Outdoor | 16 | 144.955901 | -37.821088 |
#Load City Activities and Planned Works dataset
capw_url="https://data.melbourne.vic.gov.au/api/geospatial/txcy-uafv?method=export&format=GeoJSON"
df_capw = gpd.read_file(capw_url)
print(df_capw.shape)
df_capw.head(5)
(605, 10)
| start_date | location | activity_id | end_date | status | source_id | notes | classification | small_area | geometry | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2022-04-25T00:00:00 | 17-23 Wills StreetMELBOURNE VIC 3000 | SS-1076969-0-110168-ECW-Consent Extended-25042... | 2022-06-05T00:00:00 | Confirmed | ECW-2021-334 | ePathway Consent for works | Structures | Melbourne (CBD) | MULTIPOLYGON (((144.95675 -37.81092, 144.95682... |
| 1 | 2022-01-01T00:00:00 | Therry Street between Victoria Street and Eliz... | WO-728357-1554230-1554230-61-Closed-0101202200... | 2022-06-30T00:00:00 | CONFIRMED | 728357 | - | Traffic Management | Melbourne (CBD) | MULTIPOLYGON (((144.96154 -37.80669, 144.96155... |
| 2 | 2022-05-15T00:00:00 | Canning Street between Dryburgh Street and Shi... | RP-59095-193964-21212-COU-Confirmed-1505202200... | 2022-05-15T00:00:00 | CONFIRMED | 33104 | None | Reserved Parking | North Melbourne | MULTIPOLYGON (((144.94331 -37.79619, 144.94434... |
| 3 | 2022-05-07T00:00:00 | 111-149 Nicholson StreetCARLTON VIC 3053 | SS-1102147-0-107153-ECW-Consent Extended-07052... | 2022-06-02T00:00:00 | Confirmed | ECW-2022-24 | ePathway Consent for works | Structures | Carlton | MULTIPOLYGON (((144.97414 -37.79680, 144.97405... |
| 4 | 2022-03-31T00:00:00 | 24-30 Barkly PlaceCARLTON VIC 3053 | SS-1108795-0-100732-EHD-Permit Issued-31032022... | 2022-07-30T00:00:00 | CONFIRMED | EHD-2021-89/1 | Hoarding | Structures | Carlton | MULTIPOLYGON (((144.96157 -37.80292, 144.96137... |
#Convert to date
df_capw['start_dt'] = pd.to_datetime(df_capw.start_date).dt.date
df_capw['start_year'] = pd.to_datetime(df_capw.start_dt).dt.year
df_capw['start_month'] = pd.to_datetime(df_capw.start_dt).dt.month
df_capw = df_capw.drop(['location', 'source_id', 'small_area','notes', 'status'], axis=1)
df_capw.head(5)
| start_date | activity_id | end_date | classification | geometry | start_dt | start_year | start_month | |
|---|---|---|---|---|---|---|---|---|
| 0 | 2022-04-25T00:00:00 | SS-1076969-0-110168-ECW-Consent Extended-25042... | 2022-06-05T00:00:00 | Structures | MULTIPOLYGON (((144.95675 -37.81092, 144.95682... | 2022-04-25 | 2022 | 4 |
| 1 | 2022-01-01T00:00:00 | WO-728357-1554230-1554230-61-Closed-0101202200... | 2022-06-30T00:00:00 | Traffic Management | MULTIPOLYGON (((144.96154 -37.80669, 144.96155... | 2022-01-01 | 2022 | 1 |
| 2 | 2022-05-15T00:00:00 | RP-59095-193964-21212-COU-Confirmed-1505202200... | 2022-05-15T00:00:00 | Reserved Parking | MULTIPOLYGON (((144.94331 -37.79619, 144.94434... | 2022-05-15 | 2022 | 5 |
| 3 | 2022-05-07T00:00:00 | SS-1102147-0-107153-ECW-Consent Extended-07052... | 2022-06-02T00:00:00 | Structures | MULTIPOLYGON (((144.97414 -37.79680, 144.97405... | 2022-05-07 | 2022 | 5 |
| 4 | 2022-03-31T00:00:00 | SS-1108795-0-100732-EHD-Permit Issued-31032022... | 2022-07-30T00:00:00 | Structures | MULTIPOLYGON (((144.96157 -37.80292, 144.96137... | 2022-03-31 | 2022 | 3 |
#Found there are records with value 2921-11-19 00:00:00, exclude these
df_capw = df_capw.loc[(df_capw['end_date'] < '2065-01-01')]
#Convert to date
df_capw['end_dt'] = pd.to_datetime(df_capw.end_date).dt.date
df_capw['end_year'] = pd.to_datetime(df_capw.end_dt).dt.year
df_capw['end_month'] = pd.to_datetime(df_capw.end_dt).dt.month
df_capw.head(5)
print(df_capw.shape)
(600, 11)
df_capw.info()
<class 'geopandas.geodataframe.GeoDataFrame'> Int64Index: 600 entries, 0 to 604 Data columns (total 11 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 start_date 600 non-null object 1 activity_id 600 non-null object 2 end_date 600 non-null object 3 classification 600 non-null object 4 geometry 600 non-null geometry 5 start_dt 600 non-null object 6 start_year 600 non-null int64 7 start_month 600 non-null int64 8 end_dt 600 non-null object 9 end_year 600 non-null int64 10 end_month 600 non-null int64 dtypes: geometry(1), int64(4), object(6) memory usage: 56.2+ KB
#Different types of event or activity
df_capw.classification.unique()
array(['Structures', 'Traffic Management', 'Reserved Parking', 'Event',
'Public Event', 'Private Event'], dtype=object)
#Range of years
df_capw.start_year.unique()
array([2022, 2018, 2021, 2019, 2020, 2023], dtype=int64)
#Pedestrian foot count data
sensor_data_id = "b2ak-trbp"
results = client.get(sensor_data_id, limit=6000000)
sensor_traffic = pd.DataFrame.from_records(results)
#sensor_traffic.info()
print(f'\nThe shape of dataset is {sensor_traffic.shape}. \n')
print('Sample rows of this dataset: ')
sensor_traffic.head(5)
The shape of dataset is (4562230, 10). Sample rows of this dataset:
| id | date_time | year | month | mdate | day | time | sensor_id | sensor_name | hourly_counts | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2887628 | 2019-11-01T17:00:00.000 | 2019 | November | 1 | Friday | 17 | 34 | Flinders St-Spark La | 300 |
| 1 | 2887629 | 2019-11-01T17:00:00.000 | 2019 | November | 1 | Friday | 17 | 39 | Alfred Place | 604 |
| 2 | 2887630 | 2019-11-01T17:00:00.000 | 2019 | November | 1 | Friday | 17 | 37 | Lygon St (East) | 216 |
| 3 | 2887631 | 2019-11-01T17:00:00.000 | 2019 | November | 1 | Friday | 17 | 40 | Lonsdale St-Spring St (West) | 627 |
| 4 | 2887632 | 2019-11-01T17:00:00.000 | 2019 | November | 1 | Friday | 17 | 36 | Queen St (West) | 774 |
Add columns to traffic dataset
#Add date column
sensor_traffic['date'] = pd.to_datetime(sensor_traffic.date_time).dt.date
#Add day of week column
sensor_traffic['dow'] = pd.to_datetime(sensor_traffic.date_time).dt.day_of_week
#convert sensor_id to integer
sensor_traffic['sensor_id']=sensor_traffic['sensor_id'].astype(int)
#hour of day
sensor_traffic['hod']=sensor_traffic['time'].astype(int)
sensor_traffic['year']=sensor_traffic['year'].astype(int)
sensor_traffic['mdate']=sensor_traffic['mdate'].astype(int)
#Add column for day (5am to 5pm) or night (6pm to 4am) traffic
sensor_traffic['day_counts'] = np.where(((sensor_traffic['hod']>4)
& (sensor_traffic['hod']<18)), sensor_traffic['hourly_counts'] , 0)
sensor_traffic['night_counts'] = np.where(sensor_traffic['day_counts']==0,sensor_traffic['hourly_counts'], 0)
Load Covid dataset and merge with pedestrian traffic.
Detail on using this dataset with pedestrian traffic is available in the exploratory data analysis compare pedestrian traffic pre covid to now
#download covid dataset
url = "https://raw.githubusercontent.com/M3IT/COVID-19_Data/master/Data/COVID_Data_Hub_State.csv"
download = requests.get(url).content
df_hub= pd.read_csv(io.StringIO(download.decode('utf-8')))
#look at VIC data
options = ['Victoria']
# selecting rows based on condition
ds = df_hub[df_hub['administrative_area_level_2'].isin(options)]
ds_hub = ds[['date','confirmed','administrative_area_level_2']]
#log transform
ds_hub['confirmed_log']=np.log(np.log(ds_hub['confirmed']))
#check merge column is of same data type, convert if required
ds_hub['date'] = pd.to_datetime(ds_hub['date'], infer_datetime_format=True)
sensor_traffic['date'] = pd.to_datetime(sensor_ds['date'], infer_datetime_format=True)
# Mesh Covid and foot traffic datasets
sensor_covid = pd.merge(sensor_traffic, ds_hub, on='date')
#see results
print(sensor_covid.info(),'\n')
<class 'pandas.core.frame.DataFrame'> Int64Index: 1391047 entries, 0 to 1391046 Data columns (total 18 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 id 1391047 non-null object 1 date_time 1391047 non-null object 2 year 1391047 non-null int32 3 month 1391047 non-null object 4 mdate 1391047 non-null int32 5 day 1391047 non-null object 6 time 1391047 non-null int32 7 sensor_id 1391047 non-null int32 8 sensor_name 1391047 non-null object 9 hourly_counts 1391047 non-null object 10 date 1391047 non-null datetime64[ns] 11 dow 1391047 non-null int64 12 day_counts 1391047 non-null object 13 night_counts 1391047 non-null object 14 hod 1391047 non-null int32 15 confirmed 1391047 non-null int64 16 administrative_area_level_2 1391047 non-null object 17 confirmed_log 1391047 non-null float64 dtypes: datetime64[ns](1), float64(1), int32(5), int64(2), object(9) memory usage: 175.1+ MB None
sensor_covid.head(5).T
| 0 | 1 | 2 | 3 | 4 | |
|---|---|---|---|---|---|
| id | 10705 | 10706 | 10707 | 10708 | 10709 |
| date_time | 2009-05-29T11:00:00.000 | 2009-05-29T11:00:00.000 | 2009-05-29T11:00:00.000 | 2009-05-29T11:00:00.000 | 2009-05-29T11:00:00.000 |
| year | 2009 | 2009 | 2009 | 2009 | 2009 |
| month | May | May | May | May | May |
| mdate | 29 | 29 | 29 | 29 | 29 |
| day | Friday | Friday | Friday | Friday | Friday |
| time | 11 | 11 | 11 | 11 | 11 |
| sensor_id | 6 | 8 | 9 | 10 | 11 |
| sensor_name | Flinders Street Station Underpass | Webb Bridge | Southern Cross Station | Victoria Point | Waterfront City |
| hourly_counts | 1105 | 48 | 292 | 239 | 129 |
| date | 2020-01-25 00:00:00 | 2020-01-25 00:00:00 | 2020-01-25 00:00:00 | 2020-01-25 00:00:00 | 2020-01-25 00:00:00 |
| dow | 4 | 4 | 4 | 4 | 4 |
| day_counts | 1105 | 48 | 292 | 239 | 129 |
| night_counts | 0 | 0 | 0 | 0 | 0 |
| hod | 11 | 11 | 11 | 11 | 11 |
| confirmed | 1 | 1 | 1 | 1 | 1 |
| administrative_area_level_2 | Victoria | Victoria | Victoria | Victoria | Victoria |
| confirmed_log | -inf | -inf | -inf | -inf | -inf |
sensor_covid.corr()
| year | mdate | time | sensor_id | dow | hod | confirmed | confirmed_log | |
|---|---|---|---|---|---|---|---|---|
| year | 1.000000 | 0.000066 | 0.020139 | 0.538040 | -0.006268 | 0.020139 | 0.009879 | 0.011492 |
| mdate | 0.000066 | 1.000000 | -0.002222 | 0.002976 | 0.010044 | -0.002222 | -0.014174 | -0.068916 |
| time | 0.020139 | -0.002222 | 1.000000 | 0.009223 | 0.003886 | 1.000000 | 0.003325 | 0.004837 |
| sensor_id | 0.538040 | 0.002976 | 0.009223 | 1.000000 | -0.005041 | 0.009223 | 0.021071 | 0.017593 |
| dow | -0.006268 | 0.010044 | 0.003886 | -0.005041 | 1.000000 | 0.003886 | 0.014394 | -0.016870 |
| hod | 0.020139 | -0.002222 | 1.000000 | 0.009223 | 0.003886 | 1.000000 | 0.003325 | 0.004837 |
| confirmed | 0.009879 | -0.014174 | 0.003325 | 0.021071 | 0.014394 | 0.003325 | 1.000000 | 0.488072 |
| confirmed_log | 0.011492 | -0.068916 | 0.004837 | 0.017593 | -0.016870 | 0.004837 | 0.488072 | 1.000000 |
##create data set for traffic, covid
ds_sensor_covid=sensor_covid.query("year >= 2018")
sensor_covid_avg = ds_sensor_covid.groupby(['sensor_id','sensor_name','mdate',
'month','time','year']
,as_index=False).agg({'hourly_counts': 'mean','confirmed_log':'mean'})
# Display the choropleth map
fig = px.choropleth_mapbox(
df_btp_capacity, #dataset
geojson=clueblocks, #CLUE Block spatial data
locations='block_id',
color='number_of_patrons',
color_continuous_scale=["#FFFF88", "yellow", "orange", "orange",
"darkorange", "red", "darkred"], #colour scale
range_color=(0, df_btp_capacity['number_of_patrons'].max()), #range for the colour scale
featureidkey="properties.block_id",
mapbox_style="carto-positron", #map style
zoom=13.25, #zoom level
center = {"lat": -37.81216592937499, "lon": 144.961812290625}, # set the map centre coordinates on Melbourne
opacity=0.7,
hover_name='clue_small_area', #title of the pop up box
hover_data={'block_id':True,'number_of_patrons':True}, #values to display in the popup box
labels={'number_of_patrons':'Number of Patrons','block_id':'Block Id'},
title='Bar, tavern, pub patron capacity', #Title for plot
width=950, height=800 #dimensions of plot in pixels
)
#show year 1
fig1 = px.scatter_mapbox(
df_btp_capacity_y1, lat="y_coordinate", lon="x_coordinate",
opacity=0.7,
hover_name='clue_small_area', # the title of the hover pop up box
hover_data={'census_year':True,'block_id':True,'number_of_patrons':True,
'y_coordinate':False,'x_coordinate':False}, #values to display in the popup box
color_discrete_sequence=['plum'],
labels={'number_of_patrons':'Number of Patrons','block_id':'Block Id',
'census_year':'Census Year'}, # defines labels for
)
#show year 2
fig2 = px.scatter_mapbox(
df_btp_capacity_y2, lat="y_coordinate", lon="x_coordinate",
opacity=0.7,
hover_name='clue_small_area', # the title of the hover pop up box
hover_data={'census_year':True,'block_id':True,'number_of_patrons':True,
'y_coordinate':False,'x_coordinate':False}, #values to display in the popup box
color_discrete_sequence=['green'],
labels={'number_of_patrons':'Number of Patrons','block_id':'Block Id',
'census_year':'Census Year'}, # defines labels for
)
#show year 3
fig3 = px.scatter_mapbox(
df_btp_capacity_y3, lat="y_coordinate", lon="x_coordinate",
opacity=0.75,
hover_name='clue_small_area', #title of the pop up box
hover_data={'census_year':True,'block_id':True,'number_of_patrons':True,
'y_coordinate':False,'x_coordinate':False}, #values to display in the popup box
color_discrete_sequence=['purple'],
labels={'number_of_patrons':'Number of Patrons','block_id':'Block Id',
'census_year':'Census Year'}, #labels
)
fig.add_trace(fig1.data[0])
fig.add_trace(fig2.data[0])
fig.add_trace(fig3.data[0])
fig.update_geos(fitbounds="locations", visible=True)
fig.show()
# Display the choropleth map
fig = px.choropleth_mapbox(
df_crb, #dataset
geojson=clueblocks, #CLUE Block spatial data
locations='block_id',
color='number_of_seats',
color_continuous_scale=["lightyellow","yellow", "orange",
"darkorange", "red", "darkred"], # colour scale
range_color=(0, df_crb['number_of_seats'].max()), #range for the colour scale
featureidkey="properties.block_id", #polygon identifier from the GeoJSON data
mapbox_style="carto-positron", # map style
zoom=12.25, # set the zoom level
center = {"lat": -37.81216592937499, "lon": 144.961812290625}, # set the map centre coordinates on Melbourne
opacity=0.7,
hover_name='clue_small_area', # the title of the hover pop up box
hover_data={'block_id':True,'number_of_seats':True,'seating_type':True}, #values to display in the popup box
color_discrete_sequence=['green'],
labels={'number_of_seats':'Number of Seats','block_id':'Block Id',
'seating_type':'Seating Type'}, # defines labels for
title='Cafe, restaurant, bistro seats', # Title for plot
width=950, height=800 # dimensions of plot in pixels
)
fig1 = px.scatter_mapbox(
df_crb_y2, lat="y_coordinate", lon="x_coordinate",
opacity=0.7,
hover_name='clue_small_area', # the title of the hover pop up box
hover_data={'census_year':True,'block_id':True,'number_of_seats':True,
'seating_type':True, 'y_coordinate':False,
'x_coordinate':False}, #values to display in the popup box
color_discrete_sequence=['green'],
labels={'number_of_seats':'Number of Seats','block_id':'Block Id','seating_type':'Seating Type',
'census_year':'Census Year'}, # defines labels for
)
fig2 = px.scatter_mapbox(
df_crb_y3, lat="y_coordinate", lon="x_coordinate",
opacity=0.7,
hover_name='clue_small_area', # the title of the hover pop up box
hover_data={'census_year':True,'block_id':True,'number_of_seats':True,
'seating_type':True, 'y_coordinate':False,
'x_coordinate':False}, #values to display in the popup box
color_discrete_sequence=['purple'],
labels={'number_of_seats':'Number of Seats','block_id':'Block Id','seating_type':'Seating Type',
'census_year':'Census Year'}, # defines labels
)
fig.add_trace(fig1.data[0])
fig.add_trace(fig2.data[0])
fig.update_geos(fitbounds="locations", visible=False)
fig.show()
df_capw.head()
| start_date | activity_id | end_date | classification | geometry | start_dt | start_year | start_month | end_dt | end_year | end_month | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2022-04-25T00:00:00 | SS-1076969-0-110168-ECW-Consent Extended-25042... | 2022-06-05T00:00:00 | Structures | MULTIPOLYGON (((144.95675 -37.81092, 144.95682... | 2022-04-25 | 2022 | 4 | 2022-06-05 | 2022 | 6 |
| 1 | 2022-01-01T00:00:00 | WO-728357-1554230-1554230-61-Closed-0101202200... | 2022-06-30T00:00:00 | Traffic Management | MULTIPOLYGON (((144.96154 -37.80669, 144.96155... | 2022-01-01 | 2022 | 1 | 2022-06-30 | 2022 | 6 |
| 2 | 2022-05-15T00:00:00 | RP-59095-193964-21212-COU-Confirmed-1505202200... | 2022-05-15T00:00:00 | Reserved Parking | MULTIPOLYGON (((144.94331 -37.79619, 144.94434... | 2022-05-15 | 2022 | 5 | 2022-05-15 | 2022 | 5 |
| 3 | 2022-05-07T00:00:00 | SS-1102147-0-107153-ECW-Consent Extended-07052... | 2022-06-02T00:00:00 | Structures | MULTIPOLYGON (((144.97414 -37.79680, 144.97405... | 2022-05-07 | 2022 | 5 | 2022-06-02 | 2022 | 6 |
| 4 | 2022-03-31T00:00:00 | SS-1108795-0-100732-EHD-Permit Issued-31032022... | 2022-07-30T00:00:00 | Structures | MULTIPOLYGON (((144.96157 -37.80292, 144.96137... | 2022-03-31 | 2022 | 3 | 2022-07-30 | 2022 | 7 |
# The map shows the 'geometry' polygons, to identify which sensor locations are impacted
# by the planned activity and event
outline1 = {'fillColor': 'indigo', 'color': 'purple'}
#Create a map object centered on Melbourne
map = folium.Map(location=[-37.81216592937499,
144.961812290625],
zoom_start=13.5)
#Add the current events to the map
folium.GeoJson(data=df_capw.geometry, style_function=lambda x:outline1,
name='Planned Activity and Works',).add_to(map)
folium.LayerControl().add_to(map)
map
Charts
Jack: Top locations by seats outdoors and indoors Top locations by number of patrons Top entertainment locations with activities in 2022
Barkha: What is the pedestrian traffic for these locations, also day and night Are the venues busier in the evenings Are any venues busier in the day
Bonus: Can we make a selection to see breakdown for a location dropdown list?
Or focus on the red areas for venues example Docklands
Add narrative
#Model for forecasting location growth using datasets loaded earlier
#Todo:
#Initial linear regression, compare to another model LSTM
#Techniques for controlling jittering
#Normalise data to 28 day period per month example 28/31 * measure
# eg: 28/31 * pedestrian count
#look at lower level of grain by month, dow and hod
City of Melbourne Open Data Team, 2014 - 2021,'Bar, tavern, pub patron capacity 2020', City of Melbourne, date retrieved 26 Nov 2022, https://data.melbourne.vic.gov.au/Business/Bar-tavern-pub-patron-capacity-2020/9hjf-8i2d
City of Melbourne Open Data Team, 2014 - 2021,'Cafe, restaurant, bistro seats 2020', City of Melbourne, date retrieved 26 Nov 2022, https://data.melbourne.vic.gov.au/Business/Cafe-restaurant-bistro-seats-2020/dyqx-cfn5
City of Melbourne Open Data Team, 2014 - 2021,'City Activities and Planned Works', City of Melbourne, date retrieved 26 Nov 2022, https://data.melbourne.vic.gov.au/Events/City-Activities-and-Planned-Works/txcy-uafv
City of Melbourne Open Data Team, 2014 - 2021,'Pedestrian Counting System - Monthly (counts per hour)', City of Melbourne, date retrieved 03 Dec 2022, https://dev.socrata.com/foundry/data.melbourne.vic.gov.au/b2ak-trbp
City of Melbourne Open Data Team, 2014 - 2021,'Pedestrian Counting System - Sensor Locations', City of Melbourne, date retrieved 03 Dec 2022, https://data.melbourne.vic.gov.au/Transport/Pedestrian-Counting-System-Sensor-Locations/h57g-5234
O'Brien J, et al., 2020, 'Covid 19 in Australia', covid19data.com.au, date retrieved 03 Dec 2022, https://www.covid19data.com.au/
#save notebook, required so that step to convert to html, writes latest results to file
#adapt for other OS, this is for Windows
keyboard.press_and_release('ctrl+s')
!jupyter nbconvert usecase_entertainment_location_projections.ipynb --to html --log-level WARN